import numpy as np
import math
import pandas as pd
import datetime
from datetime import datetime, date
from copy import deepcopy
from pandas_datareader.data import DataReader
import matplotlib.pyplot as plt
from scipy.stats import norm
import cvxpy as cvx
import seaborn as sns
%matplotlib inline
import pypfopt
from pypfopt import risk_models
from pypfopt import plotting
from pypfopt import expected_returns
from pypfopt import EfficientFrontier
# get adjusted close price
def get_adjre_for_multiple_stocks_csv(tickers, start_date='2015-01-01',end_date= '2020-01-01'):
#define helper function to loop through all the stocks,
#join them based on date and return only their adj close price
s = DataReader(tickers[0], 'yahoo', start_date, end_date)[["Adj Close"]]
#get log returns
s[tickers[0]] = s['Adj Close']
stocks = s[[tickers[0]]]
for ticker in tickers[1:]:
s = DataReader(ticker, 'yahoo', start_date, end_date)
s[ticker] = s['Adj Close']
stocks[ticker] = s[ticker]
return stocks.iloc[1:].fillna(0)
#get log returns
def get_logre(tickers, start_date='2015-01-01',end_date= '2020-01-01'):
result = {}
for ticker in tickers:
s = DataReader(ticker, 'yahoo', start_date, end_date)
for c in s['Adj Close']:
if c is None:
c = np.nan
else:
continue
s['log returns'] = np.log(s['Adj Close']/s['Adj Close'].shift(1))
result[ticker] = s['log returns']
return result
tickers = ["GOOGL", "GOOG", "FB", "AMZN", "EBAY", "CSCO", "JNPR", "QCOM", "ACN", "AKAM", "GPN", "IBM", "V", "INTC", "KLAC", "NXPI", "SWKS", "XLNX", "CHKP", "CTXS", "MSFT", "ORCL", "SNPS", "VMW", "AAPL", "IAC", "HPQ", "NTAP", "STX", "WDC"]
portfolio_adjre = get_adjre_for_multiple_stocks_csv(tickers)
portfolio_adjre = pd.DataFrame(portfolio_adjre)
portfolio_adjre.head()
portfolio_logre = get_logre(tickers)
portfolio_logre = (pd.DataFrame(portfolio_logre)).iloc[1:]
portfolio_logre.head()
benchmark_logre = get_logre(['ROGSX'])
benchmark_logre = pd.DataFrame(benchmark_logre)
benchmark_logre.head()
plt.figure(figsize=(20, 10))
sns.heatmap(round(portfolio_adjre.corr(), 2), cmap=sns.diverging_palette(160, 10, n = 20), annot=True, vmin=-1, vmax=1)
plt.title("Asset Correlations")
portfolio_logre[1:].expanding().apply(lambda x: sum(x) * 100, raw = True).plot(figsize = (20, 10))
#portfolio_logre.plot(figsize = (20, 10))
plt.title("Asset Returns")
plt.ylabel("Returns (%)")
plt.axhline(y=0, color='black', linestyle='--')
ax = round(portfolio_logre.std() * 260 ** 0.5 * 100, 2).plot(figsize = (20, 10), kind = "bar")
plt.title("Volatility")
plt.ylabel("Volatility (%)")
for p in ax.patches:
ax.annotate(str(p.get_height()), (p.get_x() + 0.15, p.get_height() + 0.1))
# Annualized sharpe ratio. Have to multiple by square root of 252 to annualize
ax = round(portfolio_logre.mean(axis = 0) / portfolio_logre.std(axis = 0) * 252 ** 0.5,
2).plot(kind="bar", figsize = (20, 10))
plt.title("Sharpe Ratios")
for p in ax.patches:
ax.annotate(str(p.get_height()), (p.get_x() + 0.15, p.get_height() + 0.01))
#CSCO, JNPR, QCOM, ACN, AKAM, IBM, KLAC,XLNX,CHKP,MSFT,ORCL,SNPS,VMW,APPL,HPQ,NTAP,STX,WDC, CTXS (these stocks are those whose sector was stated as technology on Yahoo Finance, hence their industry was taken instead)
sector_mapper = {"GOOGL": "Communication Services", "GOOG":"Communication Services", "FB":"Communication Services", "AMZN" : "Consumer Cyclical", "EBAY":"Consumer Cyclical", "CSCO":"Communication Equipment", "JNPR": "Communication Equipment", "QCOM":"Semiconductors", "ACN":"Information Technology Services", "AKAM" : "Software—Infrastructure",
"GPN" : "Consumer Defensive","IBM":" Information Technology Services", "V":"Financial Services", "INTC":"Semiconductors", "KLAC":"Semiconductor Equipment & Materials", "NXPI":"Semiconductors","SWKS":"Semiconductors","XLNX":"Semiconductors", "CHKP":"Software—Infrastructure","CTXS":" Software—Application", "MSFT":"Software—Infrastructure",
"ORCL":"Software—Infrastructure","SNPS":"Software—Infrastructure","VMW":"Software—Infrastructure","AAPL":"Consumer Electronics","IAC":"Communication Services","HPQ":"Computer Hardware", "NTAP":"Computer Hardware","STX":"Computer Hardware","WDC":"Computer Hardware"}
sectors = set(sector_mapper.values())
sectors
import collections
sector_count = collections.Counter(sector_mapper.values())
print(sector_count)
sector_logre = deepcopy(portfolio_logre)
#rename columns to their sectors
sector_logre.columns = sector_mapper.values()
#each sector, mean log return
sector_logre = sector_logre.groupby(level = 0, axis = 1).mean()
sector_logre.head()
plt.figure(figsize=(15, 10))
sns.heatmap(round(sector_logre.corr(), 2), cmap=sns.diverging_palette(160, 10, n = 20), annot=True, vmin=0, vmax=1)
plt.title("Sector Correlations")
from matplotlib import cm
cmap = cm.get_cmap('Set1')
sector_logre[1:].expanding(0).apply(lambda x: sum(x) * 100, raw = True).plot(figsize = (20, 10), cmap=cmap )
plt.title("Sector Returns")
plt.ylabel("Returns (%)")
plt.axhline(y=0, color='black', linestyle='--')
ax = round(sector_logre.std() * 260 ** 0.5 * 100, 2).plot(figsize = (20, 10), kind = "bar")
plt.title("Volatility")
plt.ylabel("Volatility (%)")
for p in ax.patches:
ax.annotate(str(p.get_height()), (p.get_x() + 0.15, p.get_height() + 0.1))
# Annualized sharpe ratio. Have to multiple by square root of 252 to annualize
ax = round(sector_logre.mean(axis = 0) / sector_logre.std(axis = 0) * 252 ** 0.5,
2).plot(kind="bar", figsize = (20, 10))
plt.title("Sharpe Ratios")
for p in ax.patches:
ax.annotate(str(p.get_height()), (p.get_x() + 0.15, p.get_height() + 0.01))
sector_lower = {"Software—Infrastructure":0.2, "Semiconductors": 0.1}
sector_upper = {"Consumer Cyclical":0.07}
# can allocate based on number of stocks too, or future expectations of performance of sector
in_sample = "2017-12-31" #3 years of data: 2015, 2016, 2017
is_adjre_df = portfolio_adjre.loc[:in_sample]
oos_adjre_df = portfolio_adjre.loc[in_sample:].iloc[1:]
is_logre_df = portfolio_logre.loc[:in_sample]
oos_logre_df = portfolio_logre.loc[in_sample:].iloc[1:] # one day after in_sample date
# calculate expected returns
er_exp = expected_returns.ema_historical_return(is_logre_df, returns_data=True)
#Calculate the exponentially-weighted mean of (daily) historical returns, giving higher weight to more recent data.
#Calculate sample covariance
s = risk_models.sample_cov(is_logre_df, returns_data=True)
#portfolio 1: max sharpe ratio
p1 = EfficientFrontier(er_exp, s)
p1.max_sharpe()
#max sharpe ratio
w1 = p1.clean_weights()
max_sharpe = (portfolio_logre*w1).sum(axis = 1)
cmap = cm.get_cmap("hot")
pd.Series(w1).plot.pie(figsize=(10,10), cmap=cmap);
w1
# portfolio 2: given max volatility of 15%, maximise returns
# but we shldn't focus on this because we are aggressive, so we shldn.t set volatility as constraint (technically we aren't afraid of volatility)
p2 = EfficientFrontier(er_exp,s)
p2.efficient_risk(target_volatility = 0.15)
#max returns, given 15% max volatility of portfolio
w2 = p2.clean_weights()
portfolio2 = (portfolio_logre*w2).sum(axis = 1)
w2
cmap = cm.get_cmap("Reds")
pd.Series(w2).plot.pie(figsize=(10,10), cmap=cmap);
#portfolio 3: given min return of 40%, min volatility
p3 = EfficientFrontier(er_exp, s)
p3.add_sector_constraints(sector_mapper, sector_lower, sector_upper)
googl_index = p3.tickers.index("GOOGL")
p3.add_constraint(lambda w: w[googl_index] >= 0.025)
goog_index = p3.tickers.index("GOOG")
p3.add_constraint(lambda w: w[goog_index] >= 0.025)
fb = p3.tickers.index("FB")
p3.add_constraint(lambda w: w[fb] >= 0.025)
amzn = p3.tickers.index("AMZN")
p3.add_constraint(lambda w: w[amzn] >= 0.025)
msft = p3.tickers.index("MSFT")
p3.add_constraint(lambda w: w[msft] >= 0.025)
appl = p3.tickers.index("AAPL")
p3.add_constraint(lambda w: w[appl] >= 0.025)
p3.efficient_return(target_return = 0.40)
w3 = p3.clean_weights()
portfolio3 = (portfolio_logre*w3).sum(axis = 1)
w3
cmap = cm.get_cmap("Greens")
pd.Series(w3).plot.pie(figsize=(10,10), cmap=cmap);
from pypfopt import objective_functions
p3b = EfficientFrontier(er_exp, s)
p3b.add_sector_constraints(sector_mapper, sector_lower, sector_upper)
googl_index = p3b.tickers.index("GOOGL")
p3b.add_constraint(lambda w: w[googl_index] >= 0.025)
goog_index = p3b.tickers.index("GOOG")
p3b.add_constraint(lambda w: w[goog_index] >= 0.025)
fb = p3b.tickers.index("FB")
p3b.add_constraint(lambda w: w[fb] >= 0.025)
amzn = p3b.tickers.index("AMZN")
p3b.add_constraint(lambda w: w[amzn] >= 0.025)
msft = p3b.tickers.index("MSFT")
p3b.add_constraint(lambda w: w[msft] >= 0.025)
appl = p3b.tickers.index("AAPL")
p3b.add_constraint(lambda w: w[appl] >= 0.025)
p3b.add_objective(objective_functions.L2_reg, gamma=2)
p3b.efficient_return(target_return = 0.40)
w3b = p3b.clean_weights()
portfolio3b = (portfolio_logre*w3b).sum(axis = 1)
w3b
cmap = cm.get_cmap("RdPu")
pd.Series(w3b).plot.pie(figsize=(10,10), cmap=cmap);
all_portfolio_returns = pd.concat([benchmark_logre,max_sharpe, portfolio2, portfolio3, portfolio3b], axis = 1)
all_portfolio_returns.columns = ["benchmark", "max_sharpe", "portfolio2", "portfolio3", "portfolio3b"]
in_sample_results = all_portfolio_returns[:in_sample]
oos_results = all_portfolio_returns[in_sample:].iloc[1:]
ax = round(in_sample_results.std(axis = 0) * 252 ** 0.5, 2).plot(kind="bar", figsize = (15, 5))
#avg 252 trading days a year
plt.title("In Sample Volatility")
for p in ax.patches:
ax.annotate(str(p.get_height()), (p.get_x() + 0.15, p.get_height() + 0.001))
ax = round(in_sample_results.mean(axis = 0) / in_sample_results.std(axis = 0) * 260 ** 0.5, 2).plot(kind="bar", figsize = (15, 5))
plt.title("In Sample Sharpe Ratios")
for p in ax.patches:
ax.annotate(str(p.get_height()), (p.get_x() + 0.15, p.get_height() + 0.01))
in_sample_results[1:].expanding(0).apply(lambda x: sum(x) * 100, raw = True).plot(figsize = (20, 10))
plt.title("In Sample Portfolio Returns")
plt.ylabel("Returns (%)")
plt.axhline(y=0, color='black', linestyle='--')
ax = round(oos_results.std(axis = 0) * 252 ** 0.5, 2).plot(kind="bar", figsize = (15, 5))
#avg 252 trading days a year
plt.title("Out of Sample Volatility")
for p in ax.patches:
ax.annotate(str(p.get_height()), (p.get_x() + 0.15, p.get_height() + 0.001))
ax = round(oos_results.mean(axis = 0) / oos_results.std(axis = 0) * 260 ** 0.5, 2).plot(kind="bar", figsize = (15, 5))
plt.title("Out of Sample Sharpe Ratios")
for p in ax.patches:
ax.annotate(str(p.get_height()), (p.get_x() + 0.15, p.get_height() + 0.01))
oos_results[1:].expanding(0).apply(lambda x: sum(x) * 100, raw = True).plot(figsize = (20, 10))
plt.title("Out of Sample Portfolio Returns")
plt.ylabel("Returns (%)")
plt.axhline(y=0, color='black', linestyle='--')
w3b
p1.portfolio_performance(verbose=True)
p2.portfolio_performance(verbose=True)
p3.portfolio_performance(verbose=True)
p3b.portfolio_performance(verbose=True)
from __future__ import division
import cvxpy as cvx
import numpy as np
import pandas as pd
from pandas_datareader.data import DataReader
import datetime
from datetime import datetime, date
import statsmodels.api as sm
import matplotlib.pyplot as plt
import matplotlib.dates as dt
from functools import reduce
import math as math
from math import sqrt, exp
import seaborn as sns
from copy import deepcopy
from scipy.stats import norm
%matplotlib inline
import collections
from collections import OrderedDict
#obtain portfolio of stocks
#code taken from tutorial 3 optimisation
def get_data_for_multiple_stocks(tickers, start_date, end_date):
#read in stock data
#date: YYYY-MM-DD
s = DataReader(tickers[0], 'yahoo', start_date, end_date)[["Adj Close"]]
#get log returns
s[tickers[0]] = s['Adj Close']
stocks = s[[tickers[0]]]
for ticker in tickers[1:]:
s = DataReader(ticker, 'yahoo', start_date, end_date)
s[ticker] = s['Adj Close']
stocks[ticker] = s[ticker]
return stocks.iloc[1:].fillna(0)
risk_free = ['^TNX']
tnx1 = get_data_for_multiple_stocks(risk_free, start_date = "2019-01-01",end_date= "2020-01-01")
tnx3 = get_data_for_multiple_stocks(risk_free, start_date = "2017-01-01", end_date = "2020-01-01")
tnx5 = get_data_for_multiple_stocks(risk_free, start_date = "2015-01-01", end_date = "2020-01-01")
tnx1
# /100 to get returns and then log
# have to deannualise to make it daily returns
tnx1['log_return'] = np.log(tnx1['^TNX']/25200 + 1)
tnx3['log_return'] = np.log(tnx3['^TNX']/25200 + 1)
tnx5['log_return'] = np.log(tnx5['^TNX']/25200 + 1)
tnx1
#retrieved from October 21, 2019 data from https://www.oakfunds.com/assets/files/h/oak-10.31.19-annual-report.pdf
best_tickers = ["GOOGL", "GOOG", "FB", "IAC", "AMZN", "EBAY", "CSCO", "JNPR", "QCOM", "ACN", "AKAM", "GPN", "IBM", "V", "INTC",
"KLAC", "NXPI", "SWKS", "XLNX", "CHKP", "CTXS", "MSFT", "ORCL", "SNPS", "VMW", "AAPL", "HPQ", "NTAP", "STX", "WDC"]
#rogsx is our best fund ticker, we will retrieve their 1, 3, 5 years of data
rogsx1 = get_data_for_multiple_stocks(best_tickers, start_date = "2019-01-01",end_date= "2020-01-01")
rogsx3 = get_data_for_multiple_stocks(best_tickers, start_date = "2017-01-01", end_date = "2020-01-01")
rogsx5 = get_data_for_multiple_stocks(best_tickers, start_date = "2015-01-01", end_date = "2020-01-01")
rogsx1
rogsx_shares = [18300, 26276, 143225, 98885, 15400, 336300, 598000, 447000, 159000, 101275, 136200, 128157, 80900, 60000, 557800,
215400, 133550, 113970, 197800, 169700, 129280, 280200, 439641, 159000, 140600, 161720, 480500, 321700, 329400, 204600]
total_best = sum(rogsx_shares)
rogsx_weights = []
for s in rogsx_shares:
w = s/total_best
rogsx_weights.append(w)
rogsx1['portfolio_value'] = rogsx1[best_tickers[0]]*rogsx_weights[0]
for i in range(1,30):
rogsx1['portfolio_value'] += rogsx1[best_tickers[i]]*rogsx_weights[i]
rogsx3['portfolio_value'] = rogsx3[best_tickers[0]]*rogsx_weights[0]
for i in range(1,30):
rogsx3['portfolio_value'] += rogsx3[best_tickers[i]]*rogsx_weights[i]
rogsx5['portfolio_value'] = rogsx5[best_tickers[0]]*rogsx_weights[0]
for i in range(1,30):
rogsx5['portfolio_value'] += rogsx5[best_tickers[i]]*rogsx_weights[i]
rogsx1
rogsx1['prev_close_val'] = rogsx1['portfolio_value'].shift(1)
rogsx1['log_return'] = np.log(rogsx1['portfolio_value']/rogsx1['prev_close_val'])
rogsx3['prev_close_val'] = rogsx3['portfolio_value'].shift(1)
rogsx3['log_return'] = np.log(rogsx3['portfolio_value']/rogsx3['prev_close_val'])
rogsx5['prev_close_val'] = rogsx5['portfolio_value'].shift(1)
rogsx5['log_return'] = np.log(rogsx5['portfolio_value']/rogsx5['prev_close_val'])
rogsx1
worst_tickers = ["ACIA", "ANET", "CASA", "CIEN", "CSCO", "COMM", "CMTL", "SATS", "ERIC", "EXTR", "FFIV", "INFN", "IDCC", "JNPR",
"LITE", "MSI", "NTCT", "NOK", "PLT", "SWIR", "VSAT", "VIAV", "CLNXF", "ORA.VI", "GOOGL", "FSLY", "CRWD", "RPD"]
fsdcx1 = get_data_for_multiple_stocks(worst_tickers, start_date = "2019-01-01",end_date= "2020-01-01")
fsdcx3 = get_data_for_multiple_stocks(worst_tickers, start_date = "2017-01-01", end_date = "2020-01-01")
fsdcx5 = get_data_for_multiple_stocks(worst_tickers, start_date = "2015-01-01", end_date = "2020-01-01")
fsdcx1
fsdcx_shares = [29366, 26004, 58869, 247265, 690263, 484767, 49300, 139168, 1803310, 591691, 59426, 14814, 18846, 79735,
87704, 124921, 67791, 1314859, 70800, 163900, 99451, 274212, 37200, 179900, 6, 75502, 20000, 41201]
total_worst = sum(fsdcx_shares)
fsdcx_weights = []
for s in fsdcx_shares:
w = s/total_worst
fsdcx_weights.append(w)
fsdcx1['portfolio_value'] = fsdcx1[worst_tickers[0]]*fsdcx_weights[0]
for i in range(1,28):
fsdcx1['portfolio_value'] += fsdcx1[worst_tickers[i]]*fsdcx_weights[i]
fsdcx3['portfolio_value'] = fsdcx3[worst_tickers[0]]*fsdcx_weights[0]
for i in range(1,28):
fsdcx3['portfolio_value'] += fsdcx3[worst_tickers[i]]*fsdcx_weights[i]
fsdcx5['portfolio_value'] = fsdcx5[worst_tickers[0]]*fsdcx_weights[0]
for i in range(1,28):
fsdcx5['portfolio_value'] += fsdcx5[worst_tickers[i]]*fsdcx_weights[i]
fsdcx1
fsdcx1['prev_close_val'] = fsdcx1['portfolio_value'].shift(1)
fsdcx1['log_return'] = np.log(fsdcx1['portfolio_value']/fsdcx1['prev_close_val'])
fsdcx3['prev_close_val'] = fsdcx3['portfolio_value'].shift(1)
fsdcx3['log_return'] = np.log(fsdcx3['portfolio_value']/fsdcx3['prev_close_val'])
fsdcx5['prev_close_val'] = fsdcx5['portfolio_value'].shift(1)
fsdcx5['log_return'] = np.log(fsdcx5['portfolio_value']/fsdcx5['prev_close_val'])
fsdcx1
optimised_tickers = ["GOOGL", "GOOG", "FB", "AMZN", "EBAY", "CSCO", "JNPR", "QCOM", "ACN", "AKAM", "GPN", "IBM", "V", "INTC", "KLAC",
"NXPI", "SWKS", "XLNX", "CHKP", "CTXS", "MSFT", "ORCL", "SNPS", "VMW", "AAPL", "IAC","HPQ", "NTAP", "STX", "WDC"]
#rogsx is our best fund ticker, we will retrieve their 1, 3, 5 years of data
optimised1 = get_data_for_multiple_stocks(optimised_tickers, start_date = "2019-01-01",end_date= "2020-01-01")
optimised3 = get_data_for_multiple_stocks(optimised_tickers, start_date = "2017-01-01", end_date = "2020-01-01")
optimised5 = get_data_for_multiple_stocks(optimised_tickers, start_date = "2015-01-01", end_date = "2020-01-01")
optimised1
optimised_weights = [0.025, 0.025, 0.0336, 0.07, 0.0,
0.00181, 0.0, 0.0, 0.03346, 0.0,
0.0104, 0.0, 0.02808, 0.1, 0.00104,
0.0, 0.0, 0.0, 0.0, 0.0,
0.08871, 0.0, 0.02998, 0.14565, 0.025,
0.17315, 0.01975, 0.18938, 0.0, 0.0]
sum(optimised_weights)
optimised1['portfolio_value'] = optimised1[optimised_tickers[0]]*optimised_weights[0]
for i in range(1,30):
optimised1['portfolio_value'] += optimised1[optimised_tickers[i]]*optimised_weights[i]
optimised3['portfolio_value'] = optimised3[optimised_tickers[0]]*optimised_weights[0]
for i in range(1,30):
optimised3['portfolio_value'] += optimised3[optimised_tickers[i]]*optimised_weights[i]
optimised5['portfolio_value'] = optimised5[optimised_tickers[0]]*optimised_weights[0]
for i in range(1,30):
optimised5['portfolio_value'] += optimised5[optimised_tickers[i]]*optimised_weights[i]
optimised1
optimised1['prev_close_val'] = optimised1['portfolio_value'].shift(1)
optimised1['log_return'] = np.log(optimised1['portfolio_value']/optimised1['prev_close_val'])
optimised3['prev_close_val'] = optimised3['portfolio_value'].shift(1)
optimised3['log_return'] = np.log(optimised3['portfolio_value']/optimised3['prev_close_val'])
optimised5['prev_close_val'] = optimised5['portfolio_value'].shift(1)
optimised5['log_return'] = np.log(optimised5['portfolio_value']/optimised5['prev_close_val'])
optimised1
from copy import deepcopy
tnx1year = deepcopy(tnx1)
tnx1year.dropna(inplace=True)
tnx3year = deepcopy(tnx3)
tnx3year.dropna(inplace=True)
tnx5year = deepcopy(tnx5)
tnx5year.dropna(inplace=True)
rogsx1year = deepcopy(rogsx1)
rogsx1year.dropna(inplace=True)
rogsx3year = deepcopy(rogsx3)
rogsx3year.dropna(inplace=True)
rogsx5year = deepcopy(rogsx5)
rogsx5year.dropna(inplace=True)
fsdcx1year = deepcopy(fsdcx1)
fsdcx1year.dropna(inplace=True)
fsdcx3year = deepcopy(fsdcx3)
fsdcx3year.dropna(inplace=True)
fsdcx5year = deepcopy(fsdcx5)
fsdcx5year.dropna(inplace=True)
optimised1year = deepcopy(optimised1)
optimised1year.dropna(inplace=True)
optimised3year = deepcopy(optimised3)
optimised3year.dropna(inplace=True)
optimised5year = deepcopy(optimised5)
optimised5year.dropna(inplace=True)
#all sharpe ratio will minus risk free rate based on US treasury yield
rogsx_sharpe_ratio = [float(round((rogsx1year['log_return'].mean(axis = 0) - tnx1year['log_return'].mean(axis = 0)) /
rogsx1year['log_return'].std(axis = 0) * 252 ** 0.5, 2)),
float(round((rogsx3year['log_return'].mean(axis = 0)- tnx3year['log_return'].mean(axis = 0)) /
rogsx3year['log_return'].std(axis = 0) * 252 ** 0.5, 2)),
float(round((rogsx5year['log_return'].mean(axis = 0) - tnx5year['log_return'].mean(axis = 0))
/ rogsx5year['log_return'].std(axis = 0) * 252 ** 0.5, 2))]
fsdcx_sharpe_ratio = [float(round((fsdcx1year['log_return'].mean(axis = 0) - tnx1year['log_return'].mean(axis = 0))
/ fsdcx1year['log_return'].std(axis = 0) * 252 ** 0.5, 2)),
float(round((fsdcx3year['log_return'].mean(axis = 0) - tnx3year['log_return'].mean(axis = 0))
/ fsdcx3year['log_return'].std(axis = 0) * 252 ** 0.5, 2)),
float(round((fsdcx5year['log_return'].mean(axis = 0) - tnx5year['log_return'].mean(axis = 0))
/ fsdcx5year['log_return'].std(axis = 0) * 252 ** 0.5, 2))]
optimised_sharpe_ratio = [float(round((optimised1year['log_return'].mean(axis = 0) - tnx1year['log_return'].mean(axis = 0))
/ optimised1year['log_return'].std(axis = 0) * 252 ** 0.5, 2)),
float(round((optimised3year['log_return'].mean(axis = 0) - tnx3year['log_return'].mean(axis = 0))
/ optimised3year['log_return'].std(axis = 0) * 252 ** 0.5, 2)),
float(round((optimised5year['log_return'].mean(axis = 0) - tnx5year['log_return'].mean(axis = 0))
/ optimised5year['log_return'].std(axis = 0) * 252 ** 0.5, 2))]
years = ['1 year', '3 year','5 year']
Data = {'Year': years,
'ROGSX Sharpe ratio': rogsx_sharpe_ratio,
'FSDCX Sharpe ratio': fsdcx_sharpe_ratio,
'Optimised Sharpe ratio': optimised_sharpe_ratio
}
df = pd.DataFrame(Data,columns=['Year','ROGSX Sharpe ratio', 'FSDCX Sharpe ratio', 'Optimised Sharpe ratio'])
df
rogsx_annual_returns = [float(round(((rogsx1['portfolio_value'][-1] / rogsx1['portfolio_value'][0]) - 1 ) * 100, 2)),
float(round(((rogsx3['portfolio_value'][-1] / rogsx3['portfolio_value'][0])**(1/3) - 1 ) * 100, 2)),
float(round( ((rogsx5['portfolio_value'][-1] / rogsx5['portfolio_value'][0])**(1/5) - 1 ) * 100, 2))]
fsdcx_annual_returns = [float(round(((fsdcx1['portfolio_value'][-1] / fsdcx1['portfolio_value'][0]) - 1 ) * 100, 2)),
float(round(((fsdcx3['portfolio_value'][-1] / fsdcx3['portfolio_value'][0])**(1/3) - 1 ) * 100, 2)),
float(round( ((fsdcx5['portfolio_value'][-1] / fsdcx5['portfolio_value'][0])**(1/5) - 1 ) * 100, 2))]
optimised_annual_returns = [float(round(((optimised1['portfolio_value'][-1] / optimised1['portfolio_value'][0]) - 1 ) * 100, 2)),
float(round(((optimised3['portfolio_value'][-1] / optimised3['portfolio_value'][0])**(1/3) - 1 ) * 100, 2)),
float(round( ((optimised5['portfolio_value'][-1] / optimised5['portfolio_value'][0])**(1/5) - 1 ) * 100, 2))]
years = ['1 year', '3 year','5 year']
Data = {'Year': years,
'ROGSX Annualised Returns(%)': rogsx_annual_returns,
'FSDCX Annualised Returns(%)': fsdcx_annual_returns,
'Optimised Annualised Returns(%)': optimised_annual_returns
}
df = pd.DataFrame(Data,columns=['Year', 'ROGSX Annualised Returns(%)', 'FSDCX Annualised Returns(%)', 'Optimised Annualised Returns(%)'])
df
from math import exp
rogsx_stand_dev = [float(round((1-exp(rogsx1year['log_return'].std(axis = 0))) * 252 ** 0.5 * -100, 2)),
float(round((1-exp(rogsx3year['log_return'].std(axis = 0))) * 252 ** 0.5 * -100, 2)),
float(round((1-exp(rogsx5year['log_return'].std(axis = 0))) * 252 ** 0.5 * -100, 2))]
fsdcx_stand_dev = [float(round((1-exp(fsdcx1year['log_return'].std(axis = 0))) * 252 ** 0.5 * -100, 2)),
float(round((1-exp(fsdcx3year['log_return'].std(axis = 0))) * 252 ** 0.5 * -100, 2)),
float(round((1-exp(fsdcx5year['log_return'].std(axis = 0))) * 252 ** 0.5 * -100, 2))]
optimised_stand_dev = [float(round((1-exp(optimised1year['log_return'].std(axis = 0))) * 252 ** 0.5 * -100, 2)),
float(round((1-exp(optimised3year['log_return'].std(axis = 0))) * 252 ** 0.5 * -100, 2)),
float(round((1-exp(optimised5year['log_return'].std(axis = 0))) * 252 ** 0.5 * -100, 2))]
years = ['1 year', '3 year','5 year']
Data = {'Year': years,
'ROGSX Volatility (%)': rogsx_stand_dev,
'FSDCX Volatility (%)': fsdcx_stand_dev,
'Optimised Volatility (%)': optimised_stand_dev
}
df = pd.DataFrame(Data,columns=['Year', 'ROGSX Volatility (%)', 'FSDCX Volatility (%)', 'Optimised Volatility (%)'])
df
from __future__ import division
import cvxpy as cvx
import numpy as np
import pandas as pd
from pandas_datareader.data import DataReader
import datetime
from datetime import datetime, date
import statsmodels.api as sm
import matplotlib.pyplot as plt
import matplotlib.dates as dt
from functools import reduce
import math as math
from math import sqrt, exp
import seaborn as sns
from copy import deepcopy
from scipy.stats import norm
%matplotlib inline
import collections
from collections import OrderedDict
#obtain portfolio of stocks
#code taken from tutorial 3 optimisation
def get_data_for_multiple_stocks(tickers, start_date, end_date):
#read in stock data
#date: YYYY-MM-DD
s = DataReader(tickers[0], 'yahoo', start_date, end_date)[["Adj Close"]]
#get log returns
s[tickers[0]] = s['Adj Close']
stocks = s[[tickers[0]]]
for ticker in tickers[1:]:
s = DataReader(ticker, 'yahoo', start_date, end_date)
s[ticker] = s['Adj Close']
stocks[ticker] = s[ticker]
return stocks.iloc[1:].fillna(0)
#tickers not included but are in the holdings, not top 30: GLW, DELL, HPE
#retrieved from October 21, 2019 data from https://www.oakfunds.com/assets/files/h/oak-10.31.19-annual-report.pdf
best_tickers = ["GOOGL", "GOOG", "FB", "IAC", "AMZN", "EBAY", "CSCO", "JNPR", "QCOM", "ACN", "AKAM", "GPN", "IBM", "V", "INTC", "KLAC", "NXPI", "SWKS", "XLNX", "CHKP", "CTXS", "MSFT", "ORCL", "SNPS", "VMW", "AAPL", "HPQ", "NTAP", "STX", "WDC"]
best_holdings_shares = [18300, 26276, 143225, 98885, 15400, 336300, 598000, 447000, 159000, 101275, 136200, 128157, 80900, 60000, 557800, 215400, 133550, 113970, 197800, 169700, 129280, 280200, 439641, 159000, 140600, 161720, 480500, 321700, 329400, 204600]
best_holdings = pd.DataFrame(columns=best_tickers)
best_holdings.loc[0] = best_holdings_shares
best_returns_df = get_data_for_multiple_stocks(best_tickers, start_date="2015-01-01", end_date="2020-08-01")
best_returns_df
#worst unable to access: Fidelity Cash Central Fund & Fidelity Securities Lending Cash Central Fund (6.4% of a 101.5% portfolio)
#retrieved from February 29 2020 data from https://www.actionsxchangerepository.fidelity.com/ShowDocument/documentPDF.htm?_fax=-18%2342%23-61%23-110%23114%2378%23117%2320%23-1%2396%2339%23-62%23-21%2386%23-100%2337%2316%2335%23-68%2391%23-66%2354%23103%23-16%2369%23-30%2358%23-20%2376%23-84%23-11%23-87%230%23-50%23-20%23-92%23-98%23-116%23-28%2358%23-38%23-43%23-39%23-42%23-96%23-88%2388%23-45%23106%23114%2374%23124%23-65%23-39%23120%23-89%23-74%235%23-89%23-105%23-67%23126%2377%23-126%2318%23120%2345%23-53%23-98%2364%23-49%23-12%236%23-33%23-50%2340%23-82%2388%23-13%2368%23-79%23-117%2317%23-98%23-87%23-35%23-28%2318%2364%23-76%2355%23-35%2314%23100%2316%23119%2345%23-114%23-115%2326%23-73%23122%23-43%23110%23-17%23-104%23-82%23-116%2314%232%23-71%2388%23-66%23-49%23109%23-101%2357%23-68%2330%23-78%23-22%23100%2362%23-56%23-118%2384%2326%2336%23-75%23-54%2324%2343%23-48%23-44%2368%234%23111%236%23-102%234%23-50%2378%2348%23-38%2396%23-58%23122%23-6%2392%23-52%23-1%2336%2367%2379%23-48%23-119%2354%232%2311%23-64%23
worst_tickers = ["ACIA", "ANET", "CASA", "CIEN", "CSCO", "COMM", "CMTL", "SATS", "ERIC", "EXTR", "FFIV", "INFN", "IDCC", "JNPR", "LITE", "MSI", "NTCT", "NOK", "PLT", "SWIR", "VSAT", "VIAV", "CLNXF", "ORA.VI", "GOOGL", "FSLY", "CRWD", "RPD"]
worst_holdings = pd.DataFrame(columns=worst_tickers)
worst_holdings_shares = [29366, 26004, 58869, 247265, 690263, 484767, 49300, 139168, 1803310, 591691, 59426, 14814, 18846, 79735, 87704, 124921, 67791, 1314859, 70800, 163900, 99451, 274212, 37200, 179900, 6, 75502, 20000, 41201]
worst_holdings.loc[0] = worst_holdings_shares
worst_returns_df = get_data_for_multiple_stocks(worst_tickers, start_date="2015-01-01", end_date="2020-08-01")
worst_returns_df #only returns data from May 16 2016
best_holdings
worst_holdings
#calculating portfolio value & log return for best portfolio
best_returns_df['portfolio_value'] = 0
for i in range(len(best_returns_df)):
portfolio_value = 0
for j in range(30): #set to 30 otherwise it will include the portfolio_value column
stock_price = best_returns_df.iloc[i, j]
stock_holdings = best_holdings.iloc[0, j]
stock_value = stock_price * stock_holdings
portfolio_value += stock_value
best_returns_df.iloc[i, 30] = round(portfolio_value,2)
best_returns_df['prev_close_value'] = best_returns_df['portfolio_value'].shift(1)
best_returns_df['log_return'] = np.log(best_returns_df['portfolio_value']/best_returns_df['prev_close_value'])
best_returns_df.reset_index(inplace=True)
best_returns_df['Date'] = pd.to_datetime(best_returns_df['Date'], infer_datetime_format=True)
best_returns_df
#From the optimised portfolio convert the weights into a df for easy calculation later
w3b = OrderedDict([('GOOGL', 0.025), ('GOOG', 0.025), ('FB', 0.0336), ('AMZN', 0.07), ('EBAY', 0.0),
('CSCO', 0.00181), ('JNPR', 0.0), ('QCOM', 0.0), ('ACN', 0.03346), ('AKAM', 0.0), ('GPN', 0.0104),
('IBM', 0.0), ('V', 0.02808), ('INTC', 0.1), ('KLAC', 0.00104), ('NXPI', 0.0), ('SWKS', 0.0),
('XLNX', 0.0), ('CHKP', 0.0), ('CTXS', 0.0), ('MSFT', 0.08871), ('ORCL', 0.0), ('SNPS', 0.02998),
('VMW', 0.14565), ('AAPL', 0.025), ('IAC', 0.17315), ('HPQ', 0.01975), ('NTAP', 0.18938),
('STX', 0.0), ('WDC', 0.0)])
w3b_df = pd.DataFrame(w3b, index=[0])
w3b_df = w3b_df.loc[:, (w3b_df != 0).any(axis=0)]
w3b_df.loc[1] = 0
w3b_df.loc[2] = 0
#Calculate the stocks holdings for optimised portfolio based on 2019-12-31 portfolio value
end_best_value = best_returns_df.iloc[len(best_returns_df.index)-1, best_returns_df.columns.get_loc("portfolio_value")]
for i in range(len(w3b_df.columns)):
#second row would be the total amount allocated to that stock based on optimised weightage
w3b_df.iloc[1, i] = end_best_value * w3b_df.iloc[0, i]
#third row would be the amount of stock owned based on the total amount allocated to it
stock_price = best_returns_df.iloc[len(best_returns_df.index)-1, best_returns_df.columns.get_loc(w3b_df.columns[i])]
#used floor to ensure whole stocks are owned
w3b_df.iloc[2, i] = math.floor(w3b_df.iloc[1, i]/stock_price)
w3b_df
#first row is the weights of each stock
#second row is the absolute value that is allocated to that stock
#third row is the number of stocks being held, also assumed that the number of stocks stays constant throughout
#get ticker prices for optimised portfolio
optimised_tickers = list(w3b_df.columns.values)
optimised_names = optimised_tickers
optimised_holdings = (pd.DataFrame(w3b_df.iloc[2])).transpose()
optimised_returns_df = get_data_for_multiple_stocks(optimised_tickers, start_date="2015-01-01", end_date="2020-08-01")
optimised_returns_df
optimised_returns_df
#calculating portfolio value & log return for optimised portfolio
optimised_returns_df['portfolio_value'] = 0
for i in range(len(optimised_returns_df)):
portfolio_value = 0
for j in range(len(optimised_returns_df.columns)-1): #set to len(columns)-1 otherwise it will include the portfolio_value column
stock_price = optimised_returns_df.iloc[i, j]
stock_holdings = optimised_holdings.iloc[0, j]
stock_value = stock_price * stock_holdings
portfolio_value += stock_value
optimised_returns_df.iloc[i, len(optimised_returns_df.columns)-1] = round(portfolio_value,2)
optimised_returns_df['prev_close_value'] = optimised_returns_df['portfolio_value'].shift(1)
optimised_returns_df['log_return'] = np.log(optimised_returns_df['portfolio_value']/optimised_returns_df['prev_close_value'])
optimised_returns_df.reset_index(inplace=True)
optimised_returns_df['Date'] = pd.to_datetime(optimised_returns_df['Date'], infer_datetime_format=True)
optimised_returns_df
#calculating portfolio value & log return for worst portfolio
worst_returns_df['portfolio_value'] = 0
for i in range(len(worst_returns_df)):
portfolio_value = 0
for j in range(28): #set to 28 otherwise it will include the portfolio_value column
stock_price = worst_returns_df.iloc[i, j]
stock_holdings = worst_holdings.iloc[0, j]
stock_value = stock_price * stock_holdings
portfolio_value += stock_value
worst_returns_df.iloc[i, 28] = round(portfolio_value,2)
worst_returns_df['prev_close_value'] = worst_returns_df['portfolio_value'].shift(1)
worst_returns_df['log_return'] = np.log(worst_returns_df['portfolio_value']/worst_returns_df['prev_close_value'])
worst_returns_df.reset_index(inplace=True)
worst_returns_df['Date'] = pd.to_datetime(worst_returns_df['Date'], infer_datetime_format=True)
worst_returns_df
best_returns_df1 = deepcopy(best_returns_df)
optimised_returns_df1 = deepcopy(optimised_returns_df)
worst_returns_df1 = deepcopy(worst_returns_df)
best_quantiles05 = best_returns_df1['log_return'].quantile([0.05])
optimised_quantiles05 = optimised_returns_df1['log_return'].quantile([0.05])
worst_quantiles05 = worst_returns_df1['log_return'].quantile([0.05])
#Historical simluation for best portfolio
for i in range(0, len(best_returns_df1)):
past_250 = best_returns_df1.loc[(i-250):(i-1), 'log_return']
if len(past_250) == 250:
best_returns_df1.loc[i, 'VaR_HS'], best_returns_df1.loc[i, 'VaR_HS_Value'] = past_250.quantile(0.05), round(abs(best_returns_df1.loc[i, 'prev_close_value']*(1-math.exp(past_250.quantile(0.05)))), 2)
else:
best_returns_df1.loc[i, 'VaR_HS'], best_returns_df1.loc[i, 'VaR_HS_Value'] = np.NaN, np.NaN
best_returns_df1
#Historical simluation for optimised portfolio
for i in range(0, len(optimised_returns_df1)):
past_250 = optimised_returns_df1.loc[(i-250):(i-1), 'log_return']
if len(past_250) == 250:
optimised_returns_df1.loc[i, 'VaR_HS'], optimised_returns_df1.loc[i, 'VaR_HS_Value'] = past_250.quantile(0.05), round(abs(optimised_returns_df1.loc[i, 'prev_close_value']*(1-math.exp(past_250.quantile(0.05)))), 2)
else:
optimised_returns_df1.loc[i, 'VaR_HS'], optimised_returns_df1.loc[i, 'VaR_HS_Value'] = np.NaN, np.NaN
optimised_returns_df1
#Historical simluation for worst portfolio
for i in range(0, len(worst_returns_df1)):
past_250 = worst_returns_df1.loc[(i-250):(i-1), 'log_return']
if len(past_250) == 250:
worst_returns_df1.loc[i, 'VaR_HS'], worst_returns_df1.loc[i, 'VaR_HS_Value'] = past_250.quantile(0.05), round(abs(worst_returns_df1.loc[i, 'prev_close_value']*(1-math.exp(past_250.quantile(0.05)))),2)
else:
worst_returns_df1.loc[i, 'VaR_HS'], worst_returns_df1.loc[i, 'VaR_HS_Value'] = np.NaN, np.NaN
worst_returns_df1
#ES taken from tutorial 3
#ES for best portfolio
alpha = 0.05
confidence = 0.95
for i in range(0, len(best_returns_df1)):
past_250 = best_returns_df1.loc[(i-250):(i-1), 'log_return']
T_days = len(past_250)
if T_days == 250:
VaR = [past_250.quantile(alpha)]
log_returns_sorted = sorted(past_250.dropna())
length = len(log_returns_sorted)
cutoff_rt0_index = math.floor(length*alpha +1) - 1
sum_rt0 = sum(log_returns_sorted[:cutoff_rt0_index])
best_returns_df1.loc[i, 'sum_log_95'] = sum_rt0
best_returns_df1.loc[i, 'ES_HS'] = sum_rt0/len(log_returns_sorted[:cutoff_rt0_index])
best_returns_df1.loc[i, 'ES_HS_Value'] = round((abs(best_returns_df1.loc[i, 'prev_close_value']*(1-math.exp(best_returns_df1.loc[i, 'ES_HS'])))),2)
else:
best_returns_df1.loc[i, 'ES_HS'] = np.NaN
best_returns_df1.loc[i, 'ES_HS_Value'] = np.NaN
best_returns_df1
#ES for best portfolio
alpha = 0.05
confidence = 0.95
for i in range(0, len(optimised_returns_df1)):
past_250 = optimised_returns_df1.loc[(i-250):(i-1), 'log_return']
T_days = len(past_250)
if T_days == 250:
VaR = [past_250.quantile(alpha)]
log_returns_sorted = sorted(past_250.dropna())
length = len(log_returns_sorted)
cutoff_rt0_index = math.floor(length*alpha +1) - 1
sum_rt0 = sum(log_returns_sorted[:cutoff_rt0_index])
optimised_returns_df1.loc[i, 'sum_log_95'] = sum_rt0
optimised_returns_df1.loc[i, 'ES_HS'] = sum_rt0/len(log_returns_sorted[:cutoff_rt0_index])
optimised_returns_df1.loc[i, 'ES_HS_Value'] = round((abs(optimised_returns_df1.loc[i, 'prev_close_value']*(1-math.exp(optimised_returns_df1.loc[i, 'ES_HS'])))),2)
else:
optimised_returns_df1.loc[i, 'ES_HS'] = np.NaN
optimised_returns_df1.loc[i, 'ES_HS_Value'] = np.NaN
optimised_returns_df1
#ES for worst portfolio
alpha = 0.05
confidence = 0.95
for i in range(0, len(worst_returns_df1)):
past_250 = worst_returns_df1.loc[(i-250):(i-1), 'log_return']
T_days = len(past_250)
if T_days == 250:
VaR = [past_250.quantile(alpha)]
log_returns_sorted = sorted(past_250.dropna())
length = len(log_returns_sorted)
cutoff_rt0_index = math.floor(length*alpha +1) - 1
sum_rt0 = sum(log_returns_sorted[:cutoff_rt0_index])
worst_returns_df1.loc[i, 'sum_log_95'] = sum_rt0
worst_returns_df1.loc[i, 'ES_HS'] = sum_rt0/len(log_returns_sorted[:cutoff_rt0_index])
worst_returns_df1.loc[i, 'ES_HS_Value'] = round((abs(worst_returns_df1.loc[i, 'prev_close_value']*(1-math.exp(worst_returns_df1.loc[i, 'ES_HS'])))),2)
else:
worst_returns_df1.loc[i, 'ES_HS'] = np.NaN
worst_returns_df1.loc[i, 'ES_HS_Value'] = np.NaN
worst_returns_df1
best_returns_df2 = deepcopy(best_returns_df)
optimised_returns_df2 = deepcopy(optimised_returns_df)
worst_returns_df2 = deepcopy(worst_returns_df)
#parametric for best portfolio
#computation of 5% VaR and Expected Shortfall
z = norm.ppf(0.05, loc=0, scale=1) #z-score
for i in range(0, len(best_returns_df2)):
past_250 = best_returns_df2.loc[(i-250):(i-1), 'log_return']
if len(past_250) == 250:
mu = past_250.mean()
sigma = past_250.std()
best_returns_df2.loc[i, 'VaR_Parametric'] = mu+sigma*z
best_returns_df2.loc[i, 'VaR_Parametric_Value'] = round((abs(best_returns_df2.loc[i, 'prev_close_value']*(1-math.exp(mu+sigma*z)))),2)
best_returns_df2.loc[i, 'ES_Parametric'] = mu+sigma*norm.pdf(norm.ppf(confidence, loc=0, scale=1), loc=0, scale=1)/(1-confidence)
best_returns_df2.loc[i, 'ES_Parametric_Value'] = round((abs(best_returns_df2.loc[i, 'prev_close_value']*(1-math.exp(best_returns_df2.loc[i, 'ES_Parametric'])))),2)
else:
best_returns_df2.loc[i,'VaR_Parametric'], best_returns_df2.loc[i, 'ES_Parametric'] = np.NaN, np.NaN
best_returns_df2
#parametric for optimised portfolio
#computation of 5% VaR and Expected Shortfall
z = norm.ppf(0.05, loc=0, scale=1) #z-score
for i in range(0, len(optimised_returns_df2)):
past_250 = optimised_returns_df2.loc[(i-250):(i-1), 'log_return']
if len(past_250) == 250:
mu = past_250.mean()
sigma = past_250.std()
optimised_returns_df2.loc[i, 'VaR_Parametric'] = mu+sigma*z
optimised_returns_df2.loc[i, 'VaR_Parametric_Value'] = round((abs(optimised_returns_df2.loc[i, 'prev_close_value']*(1-math.exp(mu+sigma*z)))),2)
optimised_returns_df2.loc[i, 'ES_Parametric'] = mu+sigma*norm.pdf(norm.ppf(confidence, loc=0, scale=1), loc=0, scale=1)/(1-confidence)
optimised_returns_df2.loc[i, 'ES_Parametric_Value'] = round((abs(optimised_returns_df2.loc[i, 'prev_close_value']*(1-math.exp(optimised_returns_df2.loc[i, 'ES_Parametric'])))),2)
else:
optimised_returns_df2.loc[i,'VaR_Parametric'], optimised_returns_df2.loc[i, 'ES_Parametric'] = np.NaN, np.NaN
optimised_returns_df2
#parametric for worst portfolio
#computation of 5% VaR and Expected Shortfall
z = norm.ppf(0.05, loc=0, scale=1) #z-score
for i in range(0, len(worst_returns_df2)):
past_250 = worst_returns_df2.loc[(i-250):(i-1), 'log_return']
if len(past_250) == 250:
mu = past_250.mean()
sigma = past_250.std()
worst_returns_df2.loc[i, 'VaR_Parametric'] = mu+sigma*z
worst_returns_df2.loc[i, 'VaR_Parametric_Value'] = round((abs(worst_returns_df2.loc[i, 'prev_close_value']*(1-math.exp(mu+sigma*z)))),2)
worst_returns_df2.loc[i, 'ES_Parametric'] = mu+sigma*norm.pdf(norm.ppf(confidence, loc=0, scale=1), loc=0, scale=1)/(1-confidence)
worst_returns_df2.loc[i, 'ES_Parametric_Value'] = round((abs(worst_returns_df2.loc[i, 'prev_close_value']*(1-math.exp(worst_returns_df2.loc[i, 'ES_Parametric'])))),2)
else:
worst_returns_df2.loc[i,'VaR_Parametric'], worst_returns_df2.loc[i, 'ES_Parametric'] = np.NaN, np.NaN
worst_returns_df2
#combining back with the main df for best portfolio
best_returns_df['VaR_HS'] = best_returns_df1['VaR_HS']
best_returns_df['ES_HS'] = best_returns_df1['ES_HS']
best_returns_df['VaR_Parametric'] = best_returns_df2['VaR_Parametric']
best_returns_df['ES_Parametric'] = best_returns_df2['ES_Parametric']
best_returns_df['VaR_HS_Value'] = best_returns_df1['VaR_HS_Value']
best_returns_df['ES_HS_Value'] = best_returns_df1['ES_HS_Value']
best_returns_df['VaR_Parametric_Value'] = best_returns_df2['VaR_Parametric_Value']
best_returns_df['ES_Parametric_Value'] = best_returns_df2['ES_Parametric_Value']
best_returns_df
#combining back with the main df for optimised portfolio
optimised_returns_df['VaR_HS'] = optimised_returns_df1['VaR_HS']
optimised_returns_df['ES_HS'] = optimised_returns_df1['ES_HS']
optimised_returns_df['VaR_Parametric'] = optimised_returns_df2['VaR_Parametric']
optimised_returns_df['ES_Parametric'] = optimised_returns_df2['ES_Parametric']
optimised_returns_df['VaR_HS_Value'] = optimised_returns_df1['VaR_HS_Value']
optimised_returns_df['ES_HS_Value'] = optimised_returns_df1['ES_HS_Value']
optimised_returns_df['VaR_Parametric_Value'] = optimised_returns_df2['VaR_Parametric_Value']
optimised_returns_df['ES_Parametric_Value'] = optimised_returns_df2['ES_Parametric_Value']
optimised_returns_df
#combining back with the main df for worst portfolio
worst_returns_df['VaR_HS'] = worst_returns_df1['VaR_HS']
worst_returns_df['ES_HS'] = worst_returns_df1['ES_HS']
worst_returns_df['VaR_Parametric'] = worst_returns_df2['VaR_Parametric']
worst_returns_df['ES_Parametric'] = worst_returns_df2['ES_Parametric']
worst_returns_df['VaR_HS_Value'] = worst_returns_df1['VaR_HS_Value']
worst_returns_df['ES_HS_Value'] = worst_returns_df1['ES_HS_Value']
worst_returns_df['VaR_Parametric_Value'] = worst_returns_df2['VaR_Parametric_Value']
worst_returns_df['ES_Parametric_Value'] = worst_returns_df2['ES_Parametric_Value']
worst_returns_df
#plot for VaR in absolute value
plt.figure(figsize=(20,10))
plt.plot(best_returns_df['Date'], best_returns_df['VaR_HS_Value'], 'b-', label='Best_VaR_HS_Value')
plt.plot(best_returns_df['Date'], best_returns_df['VaR_Parametric_Value'], 'y-', label='Best_VaR_Parametric_Value')
plt.plot(optimised_returns_df['Date'], optimised_returns_df['VaR_HS_Value'], 'c-', label='Optimised_VaR_HS_Value')
plt.plot(optimised_returns_df['Date'], optimised_returns_df['VaR_Parametric_Value'], 'm-', label='Optimised_VaR_Parametric_Value')
plt.plot(worst_returns_df['Date'], worst_returns_df['VaR_HS_Value'], 'g-', label='Worst_VaR_HS_Value')
plt.plot(worst_returns_df['Date'], worst_returns_df['VaR_Parametric_Value'], 'r-', label='Worst_VaR_Parametric_Value')
plt.grid(True)
plt.xlabel('Date')
plt.ylabel('Value At Risk (Logged)')
plt.title('5% daily Value-at-Risk (Logged Absolute Values)')
plt.legend(bbox_to_anchor=(1.05, 1), borderaxespad=0.)
locs, labels = plt.xticks()
plt.show()
#plot for ES in absolute value
plt.figure(figsize=(20,10))
plt.plot(best_returns_df['Date'], best_returns_df['ES_HS_Value'], 'b-', label='Best_ES_HS_Value')
plt.plot(best_returns_df['Date'], best_returns_df['ES_Parametric_Value'], 'y-', label='Best_ES_Parametric_Value')
plt.plot(optimised_returns_df['Date'], optimised_returns_df['ES_HS_Value'], 'c-', label='Optimised_ES_HS_Value')
plt.plot(optimised_returns_df['Date'], optimised_returns_df['ES_Parametric_Value'], 'm-', label='Optimised_ES_Parametric_Value')
plt.plot(worst_returns_df['Date'], worst_returns_df['ES_HS_Value'], 'g-', label='Worst_ES_HS_Value')
plt.plot(worst_returns_df['Date'], worst_returns_df['ES_Parametric_Value'], 'r-', label='Worst_ES_Parametric_Value')
plt.grid(True)
plt.xlabel('Date')
plt.ylabel('Expected Shortfall (Logged)')
plt.title('Expected Shortfall (Logged Absolute Values)')
plt.legend(bbox_to_anchor=(1.05, 1), borderaxespad=0.)
locs, labels = plt.xticks()
plt.show()
#In absolute values, percentage values will be below
Best_VaR_HS_Value = best_returns_df.iloc[len(best_returns_df.index)-1, best_returns_df.columns.get_loc("VaR_HS_Value")]
Best_VaR_Parametric_Value = best_returns_df.iloc[len(best_returns_df.index)-1, best_returns_df.columns.get_loc("VaR_Parametric_Value")]
Best_ES_HS_Value = best_returns_df.iloc[len(best_returns_df.index)-1, best_returns_df.columns.get_loc("ES_HS_Value")]
Best_ES_Parametric_Value = best_returns_df.iloc[len(best_returns_df.index)-1, best_returns_df.columns.get_loc("ES_Parametric_Value")]
print("Best portfolio: 5% VaR HS value is $" + str(Best_VaR_HS_Value))
print("Best portfolio: 5% VaR parametric value is $" + str(Best_VaR_Parametric_Value))
print("Best portfolio: Expected shortfall HS value is $" + str(Best_ES_HS_Value))
print("Best portfolio: Expected shortfall parametric value is $" + str(Best_ES_Parametric_Value))
#In absolute values, percentage values will be below
Optimised_VaR_HS_Value = optimised_returns_df.iloc[len(optimised_returns_df.index)-1, optimised_returns_df.columns.get_loc("VaR_HS_Value")]
Optimised_VaR_Parametric_Value = optimised_returns_df.iloc[len(optimised_returns_df.index)-1, optimised_returns_df.columns.get_loc("VaR_Parametric_Value")]
Optimised_ES_HS_Value = optimised_returns_df.iloc[len(optimised_returns_df.index)-1, optimised_returns_df.columns.get_loc("ES_HS_Value")]
Optimised_ES_Parametric_Value = optimised_returns_df.iloc[len(optimised_returns_df.index)-1, optimised_returns_df.columns.get_loc("ES_Parametric_Value")]
print("Optimised portfolio: 5% VaR HS value is $" + str(Optimised_VaR_HS_Value))
print("Optimised portfolio: 5% VaR parametric value is $" + str(Optimised_VaR_Parametric_Value))
print("Optimised portfolio: Expected shortfall HS value is $" + str(Optimised_ES_HS_Value))
print("Optimised portfolio: Expected shortfall parametric value is $" + str(Optimised_ES_Parametric_Value))
#In absolute values, percentage values will be below
Worst_VaR_HS_Value = worst_returns_df.iloc[len(worst_returns_df.index)-1, worst_returns_df.columns.get_loc("VaR_HS_Value")]
Worst_VaR_Parametric_Value = worst_returns_df.iloc[len(worst_returns_df.index)-1, worst_returns_df.columns.get_loc("VaR_Parametric_Value")]
Worst_ES_HS_Value = worst_returns_df.iloc[len(worst_returns_df.index)-1, worst_returns_df.columns.get_loc("ES_HS_Value")]
Worst_ES_Parametric_Value = worst_returns_df.iloc[len(worst_returns_df.index)-1, worst_returns_df.columns.get_loc("ES_Parametric_Value")]
print("Worst portfolio: 5% VaR HS value is $" + str(Worst_VaR_HS_Value))
print("Worst portfolio: 5% VaR parametric value is $" + str(Worst_VaR_Parametric_Value))
print("Worst portfolio: Expected shortfall HS value is $" + str(Worst_ES_HS_Value))
print("Worst portfolio: Expected shortfall parametric value is $" + str(Worst_ES_Parametric_Value))
#plot for VaR (%)
plt.figure(figsize=(20,10))
plt.plot(best_returns_df['Date'], abs(best_returns_df['VaR_HS']*100), 'b-', label='Best_VaR_HS')
plt.plot(best_returns_df['Date'], abs(best_returns_df['VaR_Parametric']*100), 'y-', label='Best_VaR_Parametric')
plt.plot(optimised_returns_df['Date'], abs(optimised_returns_df['VaR_HS']*100), 'c-', label='Optimised_VaR_HS')
plt.plot(optimised_returns_df['Date'], abs(optimised_returns_df['VaR_Parametric']*100), 'm-', label='Optimised_VaR_Parametric')
plt.plot(worst_returns_df['Date'], abs(worst_returns_df['VaR_HS']*100), 'g-', label='Worst_VaR_HS')
plt.plot(worst_returns_df['Date'], abs(worst_returns_df['VaR_Parametric']*100), 'r-', label='Worst_VaR_Parametric')
plt.grid(True)
plt.xlabel('Date')
plt.ylabel('Value At Risk (%)')
plt.title('5% daily Value-at-Risk (%)')
plt.legend(bbox_to_anchor=(1.05, 1), borderaxespad=0.)
locs, labels = plt.xticks()
plt.show()
#do I need to return the abs() value instead so it's not negative? same for ES below
#plot for ES (%)
plt.figure(figsize=(20,10))
plt.plot(best_returns_df['Date'], abs(best_returns_df['ES_HS']*100), 'b-', label='Best_ES_HS')
plt.plot(best_returns_df['Date'], abs(best_returns_df['ES_Parametric']*100), 'y-', label='Best_ES_Parametric')
plt.plot(optimised_returns_df['Date'], abs(optimised_returns_df['ES_HS']*100), 'c-', label='Optimised_ES_HS')
plt.plot(optimised_returns_df['Date'], abs(optimised_returns_df['ES_Parametric']*100), 'm-', label='Optimised_ES_Parametric')
plt.plot(worst_returns_df['Date'], abs(worst_returns_df['ES_HS']*100), 'g-', label='Worst_ES_HS')
plt.plot(worst_returns_df['Date'], abs(worst_returns_df['ES_Parametric']*100), 'r-', label='Worst_ES_Parametric')
plt.grid(True)
plt.xlabel('Date')
plt.ylabel('Expected Shortfall (%)')
plt.title('Expected Shortfall (%)')
plt.legend(bbox_to_anchor=(1.05, 1), borderaxespad=0.)
locs, labels = plt.xticks()
plt.show()
#best portfolio in percentage
Best_VaR_HS = abs(round(best_returns_df.iloc[len(best_returns_df.index)-1, best_returns_df.columns.get_loc("VaR_HS")]*100,2))
Best_VaR_Parametric = abs(round(best_returns_df.iloc[len(best_returns_df.index)-1, best_returns_df.columns.get_loc("VaR_Parametric")]*100,2))
Best_ES_HS = abs(round(best_returns_df.iloc[len(best_returns_df.index)-1, best_returns_df.columns.get_loc("ES_HS")]*100,2))
Best_ES_Parametric = abs(round(best_returns_df.iloc[len(best_returns_df.index)-1, best_returns_df.columns.get_loc("ES_Parametric")]*100,2))
print("Best portfolio: 5% VaR HS is " + str(Best_VaR_HS) + "%")
print("Best portfolio: 5% VaR parametric is " + str(Best_VaR_Parametric) + "%")
print("Best portfolio: Expected shortfall HS is " + str(Best_ES_HS) + "%")
print("Best portfolio: Expected shortfall parametric is " + str(Best_ES_Parametric) + "%")
#same as above, should I return the abs() value?
#optimised portfolio in percentage
Optimised_VaR_HS = abs(round(optimised_returns_df.iloc[len(optimised_returns_df.index)-1, optimised_returns_df.columns.get_loc("VaR_HS")]*100,2))
Optimised_VaR_Parametric = abs(round(optimised_returns_df.iloc[len(optimised_returns_df.index)-1, optimised_returns_df.columns.get_loc("VaR_Parametric")]*100,2))
Optimised_ES_HS = abs(round(optimised_returns_df.iloc[len(optimised_returns_df.index)-1, optimised_returns_df.columns.get_loc("ES_HS")]*100,2))
Optimised_ES_Parametric = abs(round(optimised_returns_df.iloc[len(optimised_returns_df.index)-1, optimised_returns_df.columns.get_loc("ES_Parametric")]*100,2))
print("Optimised portfolio: 5% VaR HS is " + str(Optimised_VaR_HS) + "%")
print("Optimised portfolio: 5% VaR parametric is " + str(Optimised_VaR_Parametric) + "%")
print("Optimised portfolio: Expected shortfall HS is " + str(Optimised_ES_HS) + "%")
print("Optimised portfolio: Expected shortfall parametric is " + str(Optimised_ES_Parametric) + "%")
#worst portfolio in percentage
Worst_VaR_HS = abs(round(worst_returns_df.iloc[len(worst_returns_df.index)-1, worst_returns_df.columns.get_loc("VaR_HS")]*100,2))
Worst_VaR_Parametric = abs(round(worst_returns_df.iloc[len(worst_returns_df.index)-1, worst_returns_df.columns.get_loc("VaR_Parametric")]*100,2))
Worst_ES_HS = abs(round(worst_returns_df.iloc[len(worst_returns_df.index)-1, worst_returns_df.columns.get_loc("ES_HS")]*100,2))
Worst_ES_Parametric = abs(round(worst_returns_df.iloc[len(worst_returns_df.index)-1, worst_returns_df.columns.get_loc("ES_Parametric")]*100,2))
print("Worst portfolio: 5% VaR HS is " + str(Worst_VaR_HS) + "%")
print("Worst portfolio: 5% VaR parametric is " + str(Worst_VaR_Parametric) + "%")
print("Worst portfolio: Expected shortfall HS is " + str(Worst_ES_HS) + "%")
print("Worst portfolio: Expected shortfall parametric is " + str(Worst_ES_Parametric) + "%")
# Plot: Returns vs VaR 95%
plt.figure(figsize=(20,10))
plt.plot(optimised_returns_df['Date'], abs(optimised_returns_df['VaR_HS']*100), 'c-', label='Optimised_VaR_HS')
plt.plot(optimised_returns_df['Date'], abs(optimised_returns_df['VaR_Parametric']*100), 'm-', label='Optimised_VaR_Parametric')
plt.plot(optimised_returns_df['Date'], abs(optimised_returns_df['log_return']*100), 'y-', label='Returns (in Loss)')
plt.grid(True)
plt.xlabel('Date')
plt.ylabel('Returns (in Loss) and VaR (95%)')
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
locs, labels = plt.xticks()
plt.setp(labels, rotation=45)
plt.title('Returns (in Loss) vs 95% Value-at-Risk')
plt.show()
# Zoom in: from '2020-01' to '2020-12'
start_date = '2020-01-01'
end_date = '2020-08-01'
mask2 = (optimised_returns_df['Date'] > start_date) & (optimised_returns_df['Date'] <= end_date)
zoomin_2 = optimised_returns_df[mask2]
# Plot: Returns vs VaR 95%
plt.figure(figsize=(20,10))
plt.plot(zoomin_2['Date'], abs(zoomin_2['VaR_HS']*100), 'o-', label='Optimised_VaR_HS')
plt.plot(zoomin_2['Date'], abs(zoomin_2['VaR_Parametric']*100), 'r-', label='Optimised_VaR_Parametric')
plt.plot(zoomin_2['Date'], -zoomin_2['log_return']*100, 'g-', label='Optimised Returns in Loss')
plt.grid(True)
plt.xlabel('Date')
plt.ylabel('Returns (in Loss) and VaR (95%)')
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
locs, labels = plt.xticks()
plt.setp(labels, rotation=45)
plt.title('Returns (in Loss) vs 95% Value-at-Risk')
plt.show()
# A VaR failure or violation happens when the VaR is exceeded by negative return (i.e., loss).
# Select and Zoom in on dates with exceptions.
exception_parametric_date = optimised_returns_df[optimised_returns_df['VaR_Parametric']>optimised_returns_df['log_return']]['Date']
exception_historical_date = optimised_returns_df[optimised_returns_df['VaR_HS']>optimised_returns_df['log_return']]['Date']
exception_date = pd.concat([exception_parametric_date,exception_historical_date]).drop_duplicates().reset_index(drop=True)
# Statistical tests for VaR backtesting
# Compare the different test results for the normal distribution approach at the 95% and 99% VaR levels.
observed_level = 1 - len(optimised_returns_df[optimised_returns_df['VaR_Parametric']>optimised_returns_df['log_return']])/len(optimised_returns_df)
observations = len(optimised_returns_df)
failures = len(optimised_returns_df[optimised_returns_df['VaR_Parametric']>optimised_returns_df['log_return']])
expected = observations*0.05
print("VaR 95%")
print("VaR level: %s" %(0.95))
print("Observed level: %s" %(observed_level))
print("Observations: %s" %(observations))
print("Failures: %s" %(failures))
print("Expected: %s" %(expected))
# Statistical tests for VaR backtesting
# Compare the different test results for the normal distribution approach at the 95% and 99% VaR levels.
observed_level = 1 - len(optimised_returns_df[optimised_returns_df['VaR_HS']>optimised_returns_df['log_return']])/len(optimised_returns_df)
observations = len(optimised_returns_df)
failures = len(optimised_returns_df[optimised_returns_df['VaR_HS']>optimised_returns_df['log_return']])
expected = observations*0.05
print("VaR 95%")
print("VaR level: %s" %(0.95))
print("Observed level: %s" %(observed_level))
print("Observations: %s" %(observations))
print("Failures: %s" %(failures))
print("Expected: %s" %(expected))
# Use equations to get p value of 95% Parametric VaR
from scipy.stats import binom
observed_level = 1 - len(optimised_returns_df[optimised_returns_df['VaR_Parametric']>optimised_returns_df['log_return']])/len(optimised_returns_df)
observations = len(optimised_returns_df)
failures = len(optimised_returns_df[optimised_returns_df['VaR_Parametric']>optimised_returns_df['log_return']])
expected = observations*0.05
ratio = failures/expected
backtesting_alpha = 0.05 #backtesting confidence interval
p_value_95 = 1-binom.cdf(failures, observations, 0.05)
print("P-value of VaR 95%: ", p_value_95)
if(p_value_95 < backtesting_alpha):
print("Reject null hypothesis")
else:
print(f"Not enough evidence to reject null at {backtesting_alpha} backtesting significance level under binomial distribution")
# Use equations to get p value of 95% Historical Simulated VaR
from scipy.stats import binom
observed_level = 1 - len(optimised_returns_df[optimised_returns_df['VaR_HS']>optimised_returns_df['log_return']])/len(optimised_returns_df)
observations = len(optimised_returns_df)
failures = len(optimised_returns_df[optimised_returns_df['VaR_HS']>optimised_returns_df['log_return']])
expected = observations*0.05
ratio = failures/expected
backtesting_alpha = 0.05 #backtesting confidence interval
p_value_95 = 1-binom.cdf(failures, observations, 0.05)
print("P-value of VaR 95%: ", p_value_95)
if(p_value_95 < backtesting_alpha):
print("Reject null hypothesis")
else:
print(f"Not enough evidence to reject null at {backtesting_alpha} backtesting significance level under binomial distribution")
# Calculate p value for 95% parametric VaR based on normal distribution
from scipy.stats import norm
observed_level = 1 - len([optimised_returns_df['VaR_Parametric']>optimised_returns_df['log_return']])/len(optimised_returns_df)
observations = len(optimised_returns_df)
failures = len(optimised_returns_df[optimised_returns_df['VaR_Parametric']>optimised_returns_df['log_return']])
expected = observations*0.05
ratio = failures/expected
z_score = (failures-expected)/sqrt(expected*0.95)
p_VaR_95 = 1-norm.cdf(z_score, 0, 1)
print("z score: ", z_score)
print("P-value of VaR 95%: ", p_VaR_95)
if(p_VaR_95 < backtesting_alpha):
print("Reject null hypothesis")
else:
print(f"Not enough evidence to reject null {backtesting_alpha} backtesting significance level under normal distribution ")
# Calculate p value for 95% Historical Simulated VaR based on normal distribution
from scipy.stats import norm
observed_level = 1 - len([optimised_returns_df['VaR_HS']>optimised_returns_df['log_return']])/len(optimised_returns_df)
observations = len(optimised_returns_df)
failures = len(optimised_returns_df[optimised_returns_df['VaR_HS']>optimised_returns_df['log_return']])
expected = observations*0.05
ratio = failures/expected
z_score = (failures-expected)/sqrt(expected*0.95)
p_VaR_95 = 1-norm.cdf(z_score, 0, 1)
print("z score: ", z_score)
print("P-value of VaR 95%: ", p_VaR_95)
if(p_VaR_95 < backtesting_alpha):
print("Reject null hypothesis")
else:
print(f"Not enough evidence to reject null {backtesting_alpha} backtesting significance level under normal distribution ")
#current weight of sectors
for sector in set(sector_mapper.values()):
total_weight = 0
for t,w in w3b.items():
if sector_mapper[t] == sector:
total_weight += w
print(f"{sector}: {total_weight:.3f}")
new_sector_lower = {"Software—Infrastructure":0.25}
new_sector_upper = {"Consumer Cyclical":0.07, "Semiconductors":0.07,"Computer Hardware":0.15}
sept_portfolio = get_logre(tickers, start_date='2019-01-01',end_date= '2020-09-01')
sept_portfolio = pd.DataFrame(sept_portfolio)
sept_portfolio.head()
new_in_sample = "2020-02-28" #14 months of data for testing
is_logre_df = sept_portfolio.loc[:new_in_sample]
oos_logre_df = sept_portfolio.loc[new_in_sample:].iloc[1:] # one day after in_sample date
is_logre_df.head()
# calculate expected returns
er_exp = expected_returns.ema_historical_return(is_logre_df, returns_data=True)
#Calculate the exponentially-weighted mean of (daily) historical returns, giving higher weight to more recent data.
#Calculate sample covariance
s = risk_models.sample_cov(is_logre_df, returns_data=True)
adj = EfficientFrontier(er_exp, s)
adj.add_sector_constraints(sector_mapper, new_sector_lower, new_sector_upper)
googl_index = adj.tickers.index("GOOGL")
adj.add_constraint(lambda w: w[googl_index] >= 0.02)
goog_index = adj.tickers.index("GOOG")
adj.add_constraint(lambda w: w[goog_index] >= 0.02)
fb = adj.tickers.index("FB")
adj.add_constraint(lambda w: w[fb] >= 0.02)
amzn = adj.tickers.index("AMZN")
adj.add_constraint(lambda w: w[amzn] >= 0.02)
msft = adj.tickers.index("MSFT")
adj.add_constraint(lambda w: w[msft] >= 0.02)
appl = adj.tickers.index("AAPL")
adj.add_constraint(lambda w: w[appl] >= 0.02)
adj.add_objective(objective_functions.L2_reg, gamma=2)
adj.efficient_return(target_return = 0.40)
adjw = adj.clean_weights()
adj_portfolio = (sept_portfolio*adjw).sum(axis = 1)
adjw
cmap = cm.get_cmap("RdPu")
pd.Series(adjw).plot.pie(figsize=(10,10), cmap=cmap);
#without readjustment:
noadj_portfolio = (sept_portfolio*w3b).sum(axis = 1)
new_all_portfolio_returns = pd.concat([noadj_portfolio,adj_portfolio], axis = 1)
new_all_portfolio_returns.columns = ["noadj_portfolio", "adj_portfolio"]
new_in_sample_results = new_all_portfolio_returns[:new_in_sample]
new_oos_results = new_all_portfolio_returns[new_in_sample:].iloc[1:]
ax = round(new_in_sample_results.std(axis = 0) * 252 ** 0.5, 2).plot(kind="bar", figsize = (15, 5))
#avg 252 trading days a year
plt.title("In Sample Volatility")
for p in ax.patches:
ax.annotate(str(p.get_height()), (p.get_x() + 0.15, p.get_height() + 0.001))
ax = round(new_in_sample_results.mean(axis = 0) / new_in_sample_results.std(axis = 0) * 260 ** 0.5, 2).plot(kind="bar", figsize = (15, 5))
plt.title("In Sample Sharpe Ratios")
for p in ax.patches:
ax.annotate(str(p.get_height()), (p.get_x() + 0.15, p.get_height() + 0.01))
new_in_sample_results[1:].expanding(0).apply(lambda x: sum(x) * 100, raw = True).plot(figsize = (20, 10))
plt.title("In Sample Portfolio Returns")
plt.ylabel("Returns (%)")
plt.axhline(y=0, color='black', linestyle='--')
ax = round(new_oos_results.std(axis = 0) * 252 ** 0.5, 2).plot(kind="bar", figsize = (15, 5))
#avg 252 trading days a year
plt.title("Out of Sample Volatility")
for p in ax.patches:
ax.annotate(str(p.get_height()), (p.get_x() + 0.15, p.get_height() + 0.001))
ax = round(new_oos_results.mean(axis = 0) / new_oos_results.std(axis = 0) * 260 ** 0.5, 2).plot(kind="bar", figsize = (15, 5))
plt.title("Out of Sample Sharpe Ratios")
for p in ax.patches:
ax.annotate(str(p.get_height()), (p.get_x() + 0.15, p.get_height() + 0.01))
new_oos_results[1:].expanding(0).apply(lambda x: sum(x) * 100, raw = True).plot(figsize = (20, 10))
plt.title("OUt of Sample Portfolio Returns")
plt.ylabel("Returns (%)")
plt.axhline(y=0, color='black', linestyle='--')
#current weight of sectors
for sector in set(sector_mapper.values()):
total_weight = 0
for t,w in w3b.items():
if sector_mapper[t] == sector:
total_weight += w
print(f"{sector}: {total_weight:.3f}")
new_sector_lower = {"Software—Infrastructure":0.25}
new_sector_upper = {"Consumer Cyclical":0.07, "Semiconductors":0.07,"Computer Hardware":0.15}
sept_portfolio = get_logre(tickers, start_date='2019-01-01',end_date= '2020-09-01')
sept_portfolio = pd.DataFrame(sept_portfolio)
sept_portfolio.head()
new_in_sample = "2020-02-28" #14 months of data for testing
is_logre_df = sept_portfolio.loc[:new_in_sample]
oos_logre_df = sept_portfolio.loc[new_in_sample:].iloc[1:] # one day after in_sample date
is_logre_df.head()
adj = EfficientFrontier(er_exp, s)
adj.add_sector_constraints(sector_mapper, new_sector_lower, new_sector_upper)
googl_index = adj.tickers.index("GOOGL")
adj.add_constraint(lambda w: w[googl_index] >= 0.02)
goog_index = adj.tickers.index("GOOG")
adj.add_constraint(lambda w: w[goog_index] >= 0.02)
fb = adj.tickers.index("FB")
adj.add_constraint(lambda w: w[fb] >= 0.02)
amzn = adj.tickers.index("AMZN")
adj.add_constraint(lambda w: w[amzn] >= 0.02)
msft = adj.tickers.index("MSFT")
adj.add_constraint(lambda w: w[msft] >= 0.02)
appl = adj.tickers.index("AAPL")
adj.add_constraint(lambda w: w[appl] >= 0.02)
adj.add_objective(objective_functions.L2_reg, gamma=2)
adj.efficient_return(target_return = 0.40)
adjw = adj.clean_weights()
adj_portfolio = (sept_portfolio*adjw).sum(axis = 1)
adjw
cmap = cm.get_cmap("RdPu")
pd.Series(adjw).plot.pie(figsize=(10,10), cmap=cmap);
#without readjustment:
noadj_portfolio = (sept_portfolio*w3b).sum(axis = 1)
new_all_portfolio_returns = pd.concat([noadj_portfolio,adj_portfolio], axis = 1)
new_all_portfolio_returns.columns = ["noadj_portfolio", "adj_portfolio"]
new_in_sample_results = new_all_portfolio_returns[:new_in_sample]
new_oos_results = new_all_portfolio_returns[new_in_sample:].iloc[1:]
# In Sample Results
## Volatility, Sharpe Ratio, Returns
ax = round(new_in_sample_results.std(axis = 0) * 252 ** 0.5, 2).plot(kind="bar", figsize = (15, 5))
#avg 252 trading days a year
plt.title("In Sample Volatility")
for p in ax.patches:
ax.annotate(str(p.get_height()), (p.get_x() + 0.15, p.get_height() + 0.001))
ax = round(new_in_sample_results.mean(axis = 0) / new_in_sample_results.std(axis = 0) * 260 ** 0.5, 2).plot(kind="bar", figsize = (15, 5))
plt.title("In Sample Sharpe Ratios")
for p in ax.patches:
ax.annotate(str(p.get_height()), (p.get_x() + 0.15, p.get_height() + 0.01))
new_in_sample_results[1:].expanding(0).apply(lambda x: sum(x) * 100, raw = True).plot(figsize = (20, 10))
plt.title("In Sample Portfolio Returns")
plt.ylabel("Returns (%)")
plt.axhline(y=0, color='black', linestyle='--')
# Out of Sample Results
## Volatility, Sharpe Ratio, Returns
ax = round(new_oos_results.std(axis = 0) * 252 ** 0.5, 2).plot(kind="bar", figsize = (15, 5))
#avg 252 trading days a year
plt.title("Out of Sample Volatility")
for p in ax.patches:
ax.annotate(str(p.get_height()), (p.get_x() + 0.15, p.get_height() + 0.001))
ax = round(new_oos_results.mean(axis = 0) / new_oos_results.std(axis = 0) * 260 ** 0.5, 2).plot(kind="bar", figsize = (15, 5))
plt.title("Out of Sample Sharpe Ratios")
for p in ax.patches:
ax.annotate(str(p.get_height()), (p.get_x() + 0.15, p.get_height() + 0.01))
new_oos_results[1:].expanding(0).apply(lambda x: sum(x) * 100, raw = True).plot(figsize = (20, 10))
plt.title("OUt of Sample Portfolio Returns")
plt.ylabel("Returns (%)")
plt.axhline(y=0, color='black', linestyle='--')